import json
import re

import requests
from bs4 import BeautifulSoup

headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36'
}

url = 'http://www.teandy.com/pal/palindex.html'


# 获取帕鲁数据
def get_json_data():
    data = [['编号', '名称', '属性', '工作', '食量', '掉落']]

    resp = requests.get(url, headers=headers)
    resp.encoding = 'utf-8'
    soup = BeautifulSoup(resp.text, 'html.parser')
    tr_lst = soup.find_all('tr')[1:]

    for row in tr_lst:
        # 编号名称
        id_name_td = row.find_all('td')[0]
        id_name = re.findall(r'>(.*)<', str(id_name_td))[0].replace('​​​​​', '')
        id = id_name.split('-')[0]
        name = id_name.split('-')[1]

        # 属性
        attribute_td = row.find_all('td')[2]
        attributes = re.findall(r'td>(.*)</td>', str(attribute_td))[0].replace('​​​​​', '')
        attribute_lst = attributes.split('<br/>')
        attribute_data = []
        for row2 in attribute_lst:
            attribute = re.findall(r'\[(.*)]', str(row2))[0].replace('​​​​​', '')
            attribute_data.append(attribute)

        # 工作
        work_td = row.find_all('td')[5]
        works = re.findall(r'>(.*)<', str(work_td))[0].replace('​​​​​', '')
        work_lst = works.split('<br/>')

        # 食量
        eat_td = row.find_all('td')[6]
        eat = re.findall(r'>(.*)<', str(eat_td))[0].replace('​​​​​', '')

        # 掉落
        fall_td = row.find_all('td')[8]
        falls = re.findall(r'td>(.*)</td>', str(fall_td))[0].replace('​​​​​', '')
        fall_lst = falls.split('<br/>')

        # 数组汇总
        data.append([id, name, '#'.join(attribute_data), '#'.join(work_lst), eat, '#'.join(fall_lst)])

    resp.close()
    return json.dumps(data, ensure_ascii=False)